library(dplyr)
library(ggplot2)
library(bibliometrix) 
library(forcats)
library(reshape2)
library(wordcloud2) 
library(treemap)
library(ggpp)
library(Cairo)
library(webshot)
library(htmlwidgets)

ismy <- file.exists("F:/rproject/pmwosplot/")
if(ismy){
  setwd("F:/rproject/pmwosplot")
}else{
  setwd('/sas/pubmedr/gcr/20231116')
}

# https://www.bibliometrix.org/vignettes/Introduction_to_bibliometrix.html
# bibliometrix
# 2023年11月14日15:56:20 测试pm 进行画图
# 老板意思配合pm 进行 输入一个数据源 出来很多图 
file<- c('pubmed-HANDD-set.txt')
# file <- "https://www.bibliometrix.org/datasets/pubmed_txt.txt"
getwd()
M <- convert2df(file = file, dbsource = "pubmed", format = "pubmed")

topS <- 10
if(dim(M)[1]>=topS){
  topS <- 10
}else{
  topS <- dim(M)[1]
}

print(topS)
results <- biblioAnalysis(M, sep = ";")
options(width=100)
S <- summary(object = results, k = topS, pause = FALSE)


# 参考文章 https://blog.csdn.net/qq_37364789/article/details/115395649
# http://www.idata8.com/rpackage/bibliometrix/00Index.html api 查询地址
# biblioshiny()
# 下面这个方法会报错 不用了 直接ggplot2 进行画图
# plot(x = results, k = 10, pause = T)
# biblioshiny()

MostRelSources<-as.data.frame(S$MostRelSources)
AnnualProduction <-as.data.frame(S$AnnualProduction)
# colnames(AnnualProduction)

colnames(AnnualProduction)<- c('Year','Articles')
# skim(AnnualProduction)
MostRelSources$Articles <- as.numeric(as.character(MostRelSources$Articles))
AnnualProduction$Year <- as.numeric(as.character(AnnualProduction$Year))


colnames(MostRelSources)<- c('Sources','Articles')
MostRelSources$Sources <- (as.character(MostRelSources$Sources))
MostRelSources$Articles <- as.numeric(as.character(MostRelSources$Articles))

# 期刊 
# 【Most Relevant Sources 最相关的期刊 N. of Documents】
# 期刊发文前十名
# most journal
Cairo::CairoJPEG(file="MostRelSources.jpeg", width=10, height=10,units="in",dpi=97)
ggplot(MostRelSources,  aes(x =  fct_reorder(Sources, (Articles)), y = Articles))  +
  # 条形图函数：fill设置条形图填充色，colour设置条形图边界颜色
 geom_bar(stat = "identity", fill = "lightblue", colour = "black") +
  # 字体
  theme(text = element_text(size = 15))+
  # 修改别名
  labs(title = "MostRelSources",y = 'Articles',x = 'Journal' )+
  # xy 反转
  coord_flip()
dev.off()

# 【Most Cited Sources 总引用数最多的期刊】 导出的没有被引 无

# 【Source Impact 期刊影响力 H-index排序】 pm 没有这个参数

# 【Source Growth 期刊随时间的变化】
# 它计算每年出版的顶级来源的文件
topSo = sourceGrowth(M, top = 10, cdf = TRUE)
DFtopSo=melt(topSo, id='Year')
Cairo::CairoJPEG(file="SourceGrowth.jpeg", width=10, height=10,units="in",dpi=97)
ggplot(DFtopSo,aes(Year,value, group=variable, color=variable))+geom_line(stat = "identity",size=0.9)+
  theme_classic()+
  theme(
  # 设置图例的文字大小为10号
      legend.text=element_text(size=15),
      # 设置图标题位置和文字大小
         plot.caption = element_text(hjust=0.5, size=16),
        axis.text=element_text(size=15),
        # 设置轴标题文字大小和文字加粗
         axis.title=element_text(size=15,face="bold")
) +
  # 下坐标刻度
  scale_x_continuous(breaks=seq(min(DFtopSo$Year), max(DFtopSo$Year), 20))+
  labs(title = "SourceGrowth")
dev.off()
# 作者 

# 【Most Relevant Authors 最相关作者 按Articles Fractionalized 排序】
# 发文作者最多前十名
MostProdAuthors<-as.data.frame(S$MostProdAuthors)
MostProdAuthors<-MostProdAuthors[,-3]
colnames(MostProdAuthors)<-c('Authors','Articles','ArticlesFractionalized')
MostProdAuthors$Authors <- (as.character(MostProdAuthors$Authors))
MostProdAuthors$Articles <- as.numeric(as.character(MostProdAuthors$Articles))
MostProdAuthors$ArticlesFractionalized <- as.numeric(as.character(MostProdAuthors$ArticlesFractionalized))
MostProdAuthors<-arrange(MostProdAuthors,-Articles)
df <- tibble(x = 16, y = 0, autb = list(MostProdAuthors))
CairoJPEG(file="Most Relevant Authors.jpeg", width=10, height=10,units="in",dpi=97)
ggplot(MostProdAuthors,aes(x= fct_reorder(Authors, Articles),y=Articles))+
  geom_bar(stat = "identity", fill = "lightblue", colour = "red") +
  # 字体
  theme(text = element_text(size = 15))+
  labs(title = "Most Relevant Authors" )+
  # x y 反转
  coord_flip()+
  geom_table(data = df, aes(x = x, y = y, label=autb))
dev.off()

# 【Most Local Cited Authors 被同行引用最多的】 无
#【Author Productivity through Lotka’s Law】 无

# 【Author Impact H-index】 无
# Hindex(M, field = "AU", elements, sep = ";", years = 10)
# Authorsres<-as.data.frame(results$Authors)

#作者发文时间图 自己做的
Cairo::CairoJPEG(file="authorProdOverTime.jpeg", width=10, height=10,units="in",dpi=97)
topAU <- authorProdOverTime(M, k = 10, graph = TRUE)
dev.off()

# 【Most Relevant Affiliations 发文量最多的前十机构】
Affiliations<-as.data.frame(results$Affiliations)
colnames(Affiliations)<-c('Affiliations','Articles')
Affiliations$Affiliations <- (as.character(Affiliations$Affiliations))
Affiliations$Articles <- as.numeric(as.character(Affiliations$Articles))
# 取发文机构前十
Affiliations<- Affiliations[c(1:10),]
dfAffiliations <- tibble(x = 16, y = 0, autb = list(Affiliations))
Cairo::CairoJPEG(file="Most Relevant Affiliations.jpeg", width=12, height=10,units="in",dpi=97)
ggplot(Affiliations,aes(x= fct_reorder(Affiliations,Articles),y=Articles))+
  geom_bar(stat = "identity", fill = "yellow", colour = "red") +
  # 字体
  theme(text = element_text(size = 15))+
  labs(title = "Most Relevant Affiliations" )+
  # x y 反转
  coord_flip()+
  geom_table(data = dfAffiliations, aes(x = x, y = y, label=autb))
dev.off()

# year article trend 每年发文数量图
Cairo::CairoJPEG(file="year article trend.jpeg", width=10, height=10,units="in",dpi=97)
ggplot(AnnualProduction,  aes(x =  Year, y = Articles))  +
  # 条形图函数：fill设置条形图填充色，colour设置条形图边界颜色
  geom_bar(stat = "identity", fill = "lightblue", colour = "black") +
  # 字体
  theme(text = element_text(size = 15))+
  # 修改别名
  labs(title = "year article trend" )+ scale_x_continuous(breaks=seq(min(AnnualProduction$Year), max(AnnualProduction$Year), 20))
dev.off()
# 最多发文国家
MostProdCountries<-as.data.frame(S$MostProdCountries)
MostProdCountries$Country<-as.character(MostProdCountries$Country)
MostProdCountries$Articles<-as.character(MostProdCountries$Articles)
MostProdCountriesUse<- MostProdCountries[,c('Country','Articles')]
MostProdCountriesUse$Articles<-as.numeric(MostProdCountriesUse$Articles)
Cairo::CairoJPEG(file="Country Scientific Production.jpeg", width=10, height=10,units="in",dpi=97)
ggplot(MostProdCountriesUse,  aes(x =  fct_reorder(Country, (Articles)), y = Articles))  +
  # 条形图函数：fill设置条形图填充色，colour设置条形图边界颜色
  geom_bar(stat = "identity", fill = "lightblue", colour = "black") +
  # 字体
  theme(text = element_text(size = 15))+
  # 修改别名
  
  labs(title = "Country Scientific Production",y = 'Articles',x = 'Country' )+
  # x y 反转
  coord_flip()
dev.off()


#单词
# 【Most Frequent Words - Keywords Plus】
MostRelKeywords<- as.data.frame(S$MostRelKeywords)

colnames(MostRelKeywords)<-c('AuthorKeywords','Articles','KeywordsPlus','Articles')
MostRelKeywordsPlus<- MostRelKeywords[,c(3:4)]
# MostRelKeywordsAuthor<- MostRelKeywords[,c(1:2)]
MostRelKeywordsPlus$KeywordsPlus<-as.character(MostRelKeywordsPlus$KeywordsPlus)
MostRelKeywordsPlus$Articles<-as.character(MostRelKeywordsPlus$Articles)
# MostRelKeywordsAuthor$AuthorKeywords<-as.character(MostRelKeywordsAuthor$AuthorKeywords)
# MostRelKeywordsAuthor$Articles<-as.character(MostRelKeywordsAuthor$Articles)


MostRelKeywordsPlus$Articles <- as.numeric(MostRelKeywordsPlus$Articles)
# MostRelKeywordsAuthor$Articles <- as.numeric(MostRelKeywordsAuthor$Articles)
Cairo::CairoJPEG(file="MostRelKeywordsPlus.jpeg", width=10, height=10,units="in",dpi=97)
ggplot(MostRelKeywordsPlus,  aes(x =  fct_reorder(KeywordsPlus, (Articles)), y = Articles))  +
  # 条形图函数：fill设置条形图填充色，colour设置条形图边界颜色
  geom_bar(stat = "identity", fill = "lightblue", colour = "black") +
  # 字体
  theme(text = element_text(size = 15))+
  # 修改别名
  labs(title = "Keywords Plus",y = 'Articles',x = 'key' )+
  # x y 反转
  coord_flip()
dev.off()
# Cairo::CairoJPEG(file="Authors keywords.jpeg", width=10, height=10,units="in",dpi=97)
# ggplot(MostRelKeywordsAuthor,  aes(x =  fct_reorder(AuthorKeywords, (Articles)), y = Articles))  +
#   # 条形图函数：fill设置条形图填充色，colour设置条形图边界颜色
#   geom_bar(stat = "identity", fill = "lightblue", colour = "black") +
#   # 修改别名
#   labs(title = "Authors keywords",y = 'Articles',x = 'key' )+
#   # x y 反转
#   coord_flip()
# 
# dev.off()

# Wordcloud by Keywords Plus 服务器版本无法使用 暂时放一下
MostRelKeywordsPlusTemp<- wordcloud2(MostRelKeywordsPlus,
           size=0.5,#字体大小
           fontFamily = 'Segoe UI',#字体
           fontWeight = 'bold',#字体粗细
           color='random-light',#字体颜色设置
           backgroundColor = 'grey')

htmltools::save_html(MostRelKeywordsPlusTemp,file = "MostRelKeywordsPlusTemp.html")
# 下面这个在linux 无法生成 换成了上面这个
#saveWidget(MostRelKeywordsPlusTemp, file = "MostRelKeywordsPlusTemp.html")
webshot("MostRelKeywordsPlusTemp.html", "MostRelKeywordsPluswordclouds.jpeg")
# MostRelKeywordsAuthorwordcloudsTemp<-wordcloud2(MostRelKeywordsAuthor,
#            size=0.5,#字体大小
#            fontFamily = 'Segoe UI',#字体
#            fontWeight = 'bold',#字体粗细
#            color='random-light',#字体颜色设置
#            backgroundColor = 'grey')

# htmltools::save_html(MostRelKeywordsAuthorwordcloudsTemp,file = "MostRelKeywordsAuthorwordcloudsTemp.html")
#saveWidget(MostRelKeywordsAuthorwordcloudsTemp, file = "MostRelKeywordsAuthorwordcloudsTemp.html")
# webshot("MostRelKeywordsAuthorwordcloudsTemp.html", "MostRelKeywordsAuthorwordclouds.jpeg")


# treemap 树状图 图层大小由数量决定 
Cairo::CairoJPEG(file="Tree Map by Keywords Plus.jpeg", width=10, height=10,units="in",dpi=97)
treemap(MostRelKeywordsPlus, index=c("KeywordsPlus","Articles"), vSize="Articles", vColor="Articles", type="color", title='Tree Map by Keywords Plus',palette='RdBu')
dev.off()

# 关键词年份增长图 前十
KeywordGrowth= KeywordGrowth(M,Tag = "ID", sep = ";", top = 10, cdf = TRUE)
DFKeywordGrowth=melt(KeywordGrowth, id='Year')
colnames(DFKeywordGrowth)<-c('Year','keyword','value')
Cairo::CairoJPEG(file="KeywordGrowth.jpeg", width=10, height=10,units="in",dpi=97)
ggplot(DFKeywordGrowth,aes(Year,value, group=keyword, color=keyword))+geom_line()+labs(title = "KeywordGrowth" )
dev.off()

# 合作国家 连线越多代表合作的越多
McON<- metaTagExtraction(M, Field = "AU_CO", sep = ";")
NetMatrixcON <- biblioNetwork(McON, analysis = "collaboration", network = "countries", sep = ";")
netcollaboration=networkPlot(NetMatrixcON, n = dim(NetMatrixcON)[1], Title = "Country Collaboration", type = "circle", size=TRUE, remove.multiple=FALSE,labelsize=1.2,cluster="none")
Cairo::CairoJPEG(file="Country Collaboration.jpeg", width=10, height=10,units="in",dpi=97)
plot(netcollaboration$graph)
dev.off()


#关键词共线网络 关键字同时出现
NetKeyWordMatrix <- biblioNetwork(M, analysis = "co-occurrences", network = "keywords", sep = ";")
# Plot the network
netKeyWord=networkPlot(NetKeyWordMatrix, normalize="association", weighted=T, n = 30, Title = "Keyword Co-occurrences", type = "fruchterman", size=T,edgesize = 5,labelsize=0.7)
# 
Cairo::CairoJPEG(file="Keyword Co-occurrences.jpeg", width=10, height=10,units="in",dpi=97)
plot(netKeyWord$graph)
dev.off()


# Thematic Map 基于共词网络分析和聚类的主题地图
thematicMapres = thematicMap(M, field = "ID", n = 250, minfreq = 5, size = 0.5, repel = TRUE)
Cairo::CairoJPEG(file="Thematic Map.jpeg", width=10, height=10,units="in",dpi=97)
plot(thematicMapres$map)
dev.off()

# 

# Historical Direct Citation Network
# options(width=130)
# histResults <- histNetwork(M, min.citations = 1, sep = ";")
# net <- histPlot(histResults, n=5, size = 10, labelsize=5)
# 科学领域概念结构图 对从关键字、标题或摘要字段中提取的术语进行对应分析（CA）
CS <- conceptualStructure(M,field="ID", method="CA", minDegree=4, clust=5, stemming=FALSE, labelsize=10, documents=10)
# Results of CA, MCA or MDS method  CA、MCA或MDS方法的结果
# plot(CS$res)
# Results of cluster analysis  聚类分析结果
#plot(CS$km.res)
# Conceptual structure map 概念结构图
Cairo::CairoJPEG(file="Conceptual structure map.jpeg", width=10, height=10,units="in",dpi=97)
plot(CS$graph_terms)
dev.off()
#  Results of cluster analysis  聚类分析结果
Cairo::CairoJPEG(file="Results of cluster analysis.jpeg", width=10, height=10,units="in",dpi=97)
plot(CS$graph_dendogram)
dev.off()
# Factorial map of the documents with the highest contributes 贡献值最高的文档的阶乘映射
Cairo::CairoJPEG(file="Factorial map of the documents with the highest contributes.jpeg", width=10, height=10,units="in",dpi=97)
plot(CS$graph_documents_Contrib)
dev.off()


